## Code to create some tables and graphs based off of 
## our cleaned data to respond to reviewers
##
## Jen Wu, September 2022


# Initial settings --------------------------------------------------------
pacman::p_load(tidyverse, xtable, viridis,
               estimatr, stargazer, sandwich,
               lmtest, margins)
theme_set(theme_bw())
my_stars <- c(0.05, 0.01, 0.001)

# Functions ---------------------------------------------------------------

# get table of counts for ideology and party
get_count_table_ideopar <- function(df, race, exclude_mid = TRUE){
  if (exclude_mid){
    df %>% 
      filter(r_party_name != "Independent", r_ideology_name != "Moderate",
             !is.na(r_ideology_name), r_race == race) %>% 
      group_by(r_ideology_name, r_party_name) %>% 
      tally() %>% 
      pivot_wider(names_from = "r_party_name", values_from  = "n") %>% 
      rename(` ` = r_ideology_name)
    
  }
  else{
    df %>% 
      filter(!is.na(r_ideology_name), r_race == race) %>% 
      group_by(r_ideology_name, r_party_name) %>% 
      tally() %>% 
      pivot_wider(names_from = "r_party_name", values_from  = "n") %>% 
      rename(` ` = r_ideology_name)
    
  }
}


# Load in data ------------------------------------------------------------

df_all <-  read_rds("jen_data/cleaned/df_all")



# Party and ideology trends -----------------------------------------------
# table of how many obs per year by race
df_all %>% 
  group_by(year, r_race) %>% 
  tally() %>% 
  ungroup() %>% 
  filter(r_race == "White" | r_race == "Black") %>% 
  pivot_wider(names_from = "r_race", values_from = "n") %>% 
  xtable(label = "tab:num_res_per_year", digits = rep(0,4),
         caption = "Number of Respondents over time by race") %>% 
  print(include.rownames= FALSE,  caption.placement="top") #%>% 
#cat(file = "jen_output/tables/num_res_per_year.tex")

# correlation table of ideology and partisanship, over the years
ideo_par_cor_tab <- df_all %>% 
  group_by(year, r_race) %>% 
  summarise(ideo_party_cor = cor(r_ideology, r_party, 
                                 use = "pairwise.complete.obs")) %>% 
  ungroup() %>% 
  filter(r_race == "White" | r_race == "Black") %>% 
  pivot_wider(names_from = "r_race", values_from = "ideo_party_cor")

# appendix table 1 
ideo_par_cor_tab %>% 
  xtable(label = "tab:ideo_par_cor", digits = c(0, 0, 3, 3),
         caption = "Correlation coefficient for party and ideology over time by race") %>% 
  print(include.rownames= FALSE,  caption.placement="top", type = "html") %>% 
  cat(file = "jen_output/tables/ideo_party_cor_tab.html")


#  ideo and partisanship cor, in graph form
# figure 1
g_ideo_par_cor <- ideo_par_cor_tab %>% 
  ggplot(aes(x = year)) +
  geom_point(aes(y = White)) +
  geom_smooth(aes(y = White), color = "black", se = FALSE) +
  geom_point(aes(y = Black), color= "red", shape=21) +
  geom_smooth(aes(y = Black), color = "red", linetype = "dashed", se = FALSE) +
  labs(x = "Year", y = "Party and ideology correlation")
g_ideo_par_cor

ggsave(g_ideo_par_cor, filename = "jen_output/figures/ideo_party_cor.jpeg",
       height = 8/3, width = 4, units = "in")

# 2 by 2 table of party and ideology, exclude independents and moderates
# OVER ALL YEARS
get_count_table_ideopar(df_all, race = "White") %>% 
  xtable(label = "tab:2by2_w", digits = rep(0, 4),
         caption = "Ideology and party among whites") %>% 
  print(include.rownames= FALSE,  caption.placement="top") %>% 
  cat(file = "jen_output/tables/tab_2by2_whites.tex")

get_count_table_ideopar(df_all, race = "Black") %>% 
  xtable(label = "tab:2by2_b", digits = rep(0, 4),
         caption = "Ideology and party among Blacks") %>% 
  print(include.rownames= FALSE,  caption.placement="top") %>% 
  cat(file = "jen_output/tables/tab_2by2_blacks.tex")

get_count_table_ideopar(df_all, race = "White") %>% 
  xtable(digits = rep(0, 4),
         caption = "Ideology and party among Whites") %>% 
  print(include.rownames= FALSE, type = "html", caption.placement="top") %>% 
  cat(file = "jen_output/tables/tab_2by2_whites.html")

get_count_table_ideopar(df_all, race = "Black") %>% 
  xtable(digits = rep(0, 4),
         caption = "Ideology and party among Blacks") %>% 
  print(include.rownames= FALSE, type = "html", caption.placement="top") %>% 
  cat(file = "jen_output/tables/tab_2by2_blacks.html")

# 3 by 3 table of party and ideology (allows for moderates and independents)
# OVER ALL YEARS
get_count_table_ideopar(df_all, race = "White", exclude_mid = FALSE) %>% 
  xtable(label = "tab:3by3_w", digits = rep(0, 5),
         caption = "Ideology and party among whites") %>% 
  print(include.rownames= FALSE,  caption.placement="top") %>% 
  cat(file = "jen_output/tables/tab_3by3_whites.tex")
get_count_table_ideopar(df_all, race = "Black", exclude_mid = FALSE) %>% 
  xtable(label = "tab:3by3_b", digits = rep(0, 5),
         caption = "Ideology and party among Blacks") %>% 
  print(include.rownames= FALSE,  caption.placement="top") %>% 
  cat(file = "jen_output/tables/tab_3by3_blacks.tex")


# table of party and ideology proportions
df_all %>% 
  filter(!is.na(r_ideology_name), r_race == "Black" | r_race == "White",
         r_ideology_name != "Moderate",
         r_party_name != "Independent") %>% 
  group_by(r_race, r_ideology_name, r_party_name) %>% 
  tally() %>% 
  ungroup() %>% 
  group_by(r_race) %>% 
  mutate(total = sum(n)) %>% 
  ungroup() %>% 
  mutate(prop = n/total) %>% 
  select(-n, -total) %>% 
  pivot_wider(names_from=c("r_ideology_name", "r_party_name"),
              values_from = "prop", names_sep = " ") %>% 
  rename(Race = r_race) %>% 
  xtable(digits = 3, caption = 
           "Proportion of whites and Blacks falling in each ideology-party pair over 1972-2016") %>% 
  print(include.rownames= FALSE, type = "html",  caption.placement="top")  %>% 
  cat(file = "jen_output/tables/tab_prop_ideoparty.html")


# Familiarity scores (ideological awareness) -------------------------------

df_all %>% 
  group_by(year) %>% 
  summarise(across(ends_with("_correct"), ~sum(.))) %>% 
  View()

# all pres years except 1980, 1996, 2000 (cpty not asked)
years_ideo <- seq(1972, 2016, 4)[-8][-7][-3]


# histogram of familiarity scores 
# figure 2
g_famscores_hist <- df_all %>% 
  filter(r_race == "White" | r_race == "Black"
         # subset to years that are not missing any of our ideo measures
         #, year %in% years_ideo
  ) %>% 
  group_by(year, r_race, ideo_awareness) %>% 
  tally() %>% 
  mutate(total = sum(n)) %>% 
  ungroup() %>% 
  mutate(prop = n/total) %>% 
  # start graph
  ggplot() +
  geom_col(aes(x = ideo_awareness, y = prop,
               fill = r_race), color = "black",
           position = position_dodge()) +
  facet_wrap(~year) +
  labs(fill = "Race", x = "Liberal-Conservative Familiarity (0-lowest, 5-highest)",
       y = "Proportion")  +
  scale_x_continuous(breaks = seq(0,5)) +
  scale_fill_manual(values=c("white", "black"))


g_famscores_hist

ggsave(g_famscores_hist, filename = "jen_output/figures/famscores_hist.pdf",
       height = 5, width =8)


# do it pooled
# 
g_famscores_hist_pooled <- df_all %>% 
  filter(!is.na(ideo_awareness), 
         r_race == "White" | r_race == "Black") %>% 
  group_by(r_race, ideo_awareness) %>% 
  tally() %>% 
  mutate(total = sum(n)) %>% 
  ungroup() %>% 
  mutate(prop = n/total) %>% 
  ggplot() +
  geom_col(aes(x = ideo_awareness, y = prop,
               fill = r_race), 
           position = position_dodge()) + 
  labs(fill = "Race", x = "Ideological Awareness (0-lowest, 5 highest)",
       y = "Proportion", title = "1972-2016 (Pooled)") +
  scale_x_continuous(breaks = seq(0,5)) +
  scale_fill_viridis_d(option = "cividis")

g_famscores_hist_pooled
ggsave(g_famscores_hist_pooled, 
       filename = "jen_output/figures/famscores_hist_pooled.pdf",
       height = 4, width = 6)





# Familiarity scores and ideology (high fam) -----------------------------
# ideology of those with "high" familiarity 
df_highfam <- df_all %>% 
  filter(!is.na(ideo_awareness_scaled.center),
         r_ideology_name == "Liberal" | r_ideology_name == "Conservative") %>% 
  group_by(year, r_race) %>% 
  mutate(fam_top = max(ideo_awareness_scaled.center),
         fam_med = median(ideo_awareness_scaled.center)) %>% 
  ungroup() %>% 
  mutate(across(starts_with("fam"), 
                ~ifelse(ideo_awareness_scaled.center >= ., 1, 0))) %>% 
  pivot_longer(starts_with("fam"), names_to = "type",
               values_to = "group") %>% 
  group_by(year, type, r_race, r_party_name, group, r_ideology_name) %>% 
  tally() %>% 
  mutate(freq = n/sum(n)) %>% 
  ungroup()

# make graph with "high fam"
g_highfam <- df_highfam %>% 
  filter(r_ideology_name == "Liberal") %>% 
  ggplot(aes(year, freq, color = factor(group))) +
  geom_point() +
  geom_line() +
  facet_wrap(~type) +
  labs(y = "Percent liberal", x = "Year", 
       color = "Group") 
g_highfam

ggsave(g_highfam, file = "jen_output/figures/highfam.pdf",
       width = 8, height = 5)

# make table with "high fam"
highfam_tab <- df_highfam %>% 
  filter(r_race == "White" | r_race == "Black",
         r_party_name != "Independent") %>% 
  group_by(r_race, r_party_name, type, group, r_ideology_name) %>% 
  summarise(n = sum(n)) %>% 
  mutate(total = sum(n),
         prop = n/total) %>% 
  ungroup() %>% 
  filter(r_party_name == "Democrat" & r_ideology_name == "Liberal"
         | (r_party_name == "Republican" & r_ideology_name == "Conservative")) %>% 
  arrange(type, r_party_name) %>% 
  mutate(category = paste(r_race,r_party_name, r_ideology_name),
         type = case_when(type == "fam_med" & group == 1 ~ ">= median",
                          type == "fam_med" & group == 0 ~ "< median",
                          type == "fam_top" & group == 1 ~ "Only top",
                          type == "fam_top" & group == 0 ~ "Less than top"
                          
         )) %>% 
  select(category, type, n, total, prop) 

highfam_tab %>% 
  xtable(digits = c(rep(0,5), 3),
         caption = "Proportion with 'ideology match' among race, party, and liberal-conservative familiarity score grouping") %>% 
  print(include.rownames= FALSE,  caption.placement="top", type = "html") %>% 
  cat(file = "jen_output/tables/ideo_mismatch1.html")

highfam_tab %>% 
  select(-n, - total) %>% 
  pivot_wider(names_from = type,  values_from = prop) %>% 
  xtable(digits = c(0, 0, rep(3,4)),
         caption = "Proportion with 'ideology match' among race, party, and liberal-conservative familiarity score grouping") %>% 
  print(include.rownames= FALSE,  caption.placement="top", type = "html") %>% 
  cat(file = "jen_output/tables/ideo_mismatch2.html")

df_highfam %>% 
  group_by(type, group, r_ideology_name) %>% 
  summarise(n = sum(n)) %>% 
  mutate(total = sum(n),
         freq = n/total) %>% 
  ungroup() %>% 
  mutate(type = case_when(type == "fam_med" & group == 1 ~ "Greater than median",
                          type == "fam_med" & group == 0 ~ "Less than median",
                          type == "fam_top" & group == 1 ~ "Only top",
                          type == "fam_top" & group == 0 ~ "Less than top"
                          
  )) %>% 
  filter(r_ideology_name == "Liberal") %>% 
  select(-group, -r_ideology_name) %>% 
  xtable(digits = c(0, 0, 3, 3, 3),
         caption = "Proportion liberal among familiarity score ranking") %>% 
  print(include.rownames= FALSE,  caption.placement="top", type = "html") %>% 
  cat(file = "jen_output/tables/highfam.html")




# Familiarity scores and ideology (regressions) ---------------------------

predideo_cov <- c(paste0(c("ideo_awareness_scaled", "ofcrec.index", "ft_blacks",
                           "ft_whites", "ft_big_business", "ft_unions", "ft_hispanics",
                           "ft_middle_class", "ft_gays", "econ_policy.index", 
                           "social_policy.index", "religiosity.index", 
                           "morality.index"), ".center"), "r_age", "r_income",
                  "r_gender", "r_education", "factor(year)")
predideo_cov_simp <- c(paste0(c("ideo_awareness_scaled", "econ_policy.index", 
                                "social_policy.index", "religiosity.index", 
                                "morality.index"), ".center"), "r_age", "r_income",
                       "r_gender", "r_education", "factor(year)")

predideo_cov_all <- c(paste0(c("ideo_awareness_scaled", "ofcrec.index2", "ft_blacks",
                               "ft_whites", "ft_big_business", "ft_unions", "ft_hispanics",
                               "ft_middle_class", "ft_gays", "econ_policy.index", 
                               "social_policy.index", "religiosity.index", 
                               "morality.index"), ".center"), "r_age", "r_income",
                      "r_gender", "r_education", "factor(year)")

for (var in predideo_cov[2:12]){
  print(var)
  table(df_all$year, !is.na(df_all[,var])) |> print()
}

# regression for figure 3 and table b.2
predicting_ideo_2012 <-  lm(formula = paste0("r_ideology~", 
                                             paste(predideo_cov[-18], collapse = "+")),
                            data = df_all,
                            subset = df_all$r_race == "Black",
                            weights = df_all$weight)
summary(predicting_ideo_2012)

predicting_ideo_all <-  lm(formula = paste0("r_ideology~", 
                                            paste(predideo_cov_all, collapse = "+")),
                           data = df_all,
                           subset = df_all$r_race == "Black",
                           weights = df_all$weight)
summary(predicting_ideo_all)

# 2004, 2008, 2012 because of ft_middle_class
predicting_ideo <- lm(formula = paste0("r_ideology~", 
                                       paste(predideo_cov[-2], collapse = "+")),
                      data = df_all,
                      subset = df_all$r_race == "Black",
                      weights = df_all$weight
)
summary(predicting_ideo)

#1992, 1994, 1996, 2004, 2008, 2012, 2016
predicting_ideo_simp <- lm(formula = paste0("r_ideology~", 
                                            paste(predideo_cov_simp, collapse = "+")),
                           data = df_all,
                           subset = df_all$r_race == "Black",
                           weights = df_all$weight
)
summary(predicting_ideo_simp)

# same but with whites
predicting_ideo_2012_w <-  lm(formula = paste0("r_ideology~", 
                                               paste(predideo_cov[-18], collapse = "+")),
                              data = df_all,
                              subset = df_all$r_race == "White",
                              weights = df_all$weight)
summary(predicting_ideo_2012_w)

# 2004, 2008, 2012 because of ft_middle_class
predicting_ideo_w <- lm(formula = paste0("r_ideology~", 
                                         paste(predideo_cov[-2], collapse = "+")),
                        data = df_all,
                        subset = df_all$r_race == "White",
                        weights = df_all$weight
)
summary(predicting_ideo_w)

#1992, 1994, 1996, 2004, 2008, 2012, 2016
predicting_ideo_simp_w <- lm(formula = paste0("r_ideology~", 
                                              paste(predideo_cov_simp, collapse = "+")),
                             data = df_all,
                             subset = df_all$r_race == "White",
                             weights = df_all$weight
)
summary(predicting_ideo_simp_w)

predicting_ideo_all_w <-  lm(formula = paste0("r_ideology~", 
                                              paste(predideo_cov_all, collapse = "+")),
                             data = df_all,
                             subset = df_all$r_race == "White",
                             weights = df_all$weight)
summary(predicting_ideo_all_w)


#covariate labels
predideo_cov_labs <-  c("Liberal-conservative familiarity",
                        "Office recognition (4 ques.)",
                        "Office recognition (3 ques.)",
                        "FT - Blacks",
                        "FT - Whites",
                        "FT - Big business",
                        "FT - Unions",
                        "FT - Hispanics",
                        "FT - Middle class",
                        "FT - Gays and lesbians",
                        "Economic policy",
                        "Social policy",
                        "Religiosity",
                        "Moral traditionalism")

se_ideo_list <- list(coeftest(predicting_ideo_simp, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2],
                     coeftest(predicting_ideo, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2],
                     coeftest(predicting_ideo_2012, vcov = vcovHC, 
                              type = "HC1")[,2],
                     coeftest(predicting_ideo_all, vcov = vcovHC, 
                              type = "HC1")[,2])

se_ideo_list_w <- list(coeftest(predicting_ideo_simp_w, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(predicting_ideo_w, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(predicting_ideo_2012_w, vcov = vcovHC, 
                                type = "HC1")[,2],
                       coeftest(predicting_ideo_all_w, vcov = vcovHC, 
                                type = "HC1")[,2])

stargazer(predicting_ideo_simp, predicting_ideo, predicting_ideo_2012,
          predicting_ideo_all, 
          se = se_ideo_list,
          type = "html",
          dep.var.caption = "",
          covariate.labels = predideo_cov_labs,
          dep.var.labels = "Ideology (Conservative)",
          omit.stat = c("rsq", "f", "ser"),
          label = "tab:lm_main_regs",
          title = "Table B.2: Predicting ideological identification among Black respondents",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/lm_predideo.html")


stargazer(predicting_ideo_simp_w, predicting_ideo_w, predicting_ideo_2012_w,
          predicting_ideo_all_w,
          se = se_ideo_list_w,
          type = "html",
          dep.var.caption = "",
          covariate.labels = predideo_cov_labs,
          dep.var.labels = "Ideology (Conservative)",
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Table B.3: Predicting ideological identification among white respondents",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/lm_predideo_w.html")

se_ideo_list_all <- c(se_ideo_list,se_ideo_list_w)
stargazer(predicting_ideo_simp, predicting_ideo, predicting_ideo_2012,
          predicting_ideo_all,
          predicting_ideo_simp_w, predicting_ideo_w, predicting_ideo_2012_w,
          predicting_ideo_all_w,
          se = se_ideo_list_all,
          type = "html",
          dep.var.caption = "",
          covariate.labels = predideo_cov_labs,
          dep.var.labels = "Ideology (Conservative)",
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Table B.2: Predicting ideological identification among Blacks (1-3) and whites (4-6)",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/lm_predideo_all.html")

ideo_aware_lms <- list(predicting_ideo_simp, predicting_ideo, predicting_ideo_2012, 
                       predicting_ideo_all,
                       predicting_ideo_simp_w, predicting_ideo_w, predicting_ideo_2012_w,
                       predicting_ideo_all_w)

ideo_aware_coef <- rep(NA, length(ideo_aware_lms))
for (i in 1:length(ideo_aware_lms)){
  ideo_aware_coef[i] <- ideo_aware_lms[[i]][[1]][2]
}

ideo_aware_se <- rep(NA, length(se_ideo_list_all))
for (i in 1:length(se_ideo_list_all)){
  ideo_aware_se[i] <- se_ideo_list_all[[i]][2]
}

model <- rep(seq(1,4),2)
race <- c(rep("Black Americans",4), rep("White Americans",4))
ideo_aware_df <- data.frame(model, race, 
                            coef = ideo_aware_coef, se = ideo_aware_se) %>% 
  mutate(upr_ci = coef + 1.96 * se,
         lwr_ci = coef - 1.96 * se)

ideo_aware_df$model <- factor(ideo_aware_df$model, 
                              level = seq(4,1))

# FIGURE 3
g_lc_ideo <- ggplot(ideo_aware_df, aes(model,coef)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_errorbar(aes(ymin = lwr_ci, ymax = upr_ci),width = .4) +
  geom_label(aes(label = round(coef,3))) +
  coord_flip() +
  facet_wrap(~race, ncol = 1) +
  labs(x = "Model for predicting ideology (conservative)", y = "Liberal-conservative familiarity coefficient and 95% CI")
g_lc_ideo

ggsave(g_lc_ideo,       
       filename = "jen_output/figures/lc_ideo.pdf",
       height = 6, width = 8)

# Redo main tables with fixed effects -------------------------------------

# main table with year fixed effects (covers years 2012, 2016)
lm_partyideo_rc <- lm(
  r_party ~ 
    ideo_awareness_scaled.center*r_ideology.center +
    race_of_interviewer*r_ideology.center +
    racial_consciousness.center*r_ideology.center +
    religiosity.index +
    morality.index +
    r_age +
    r_income +
    r_gender +
    r_education +
    factor(year),
  data = df_all,
  subset = df_all$r_race == "Black",
  weights = weight)

summary(lm_partyideo_rc)


# main table but no racial consciousness with year fixed effects
# (covers years... )
lm_partyideo <- lm(
  r_party ~ 
    ideo_awareness_scaled.center*r_ideology.center +
    race_of_interviewer*r_ideology.center +
    religiosity.index +
    morality.index +
    r_age +
    r_income +
    r_gender +
    r_education +
    factor(year),
  data = df_all,
  subset = df_all$r_race == "Black",
  weights = weight)

summary(lm_partyideo)


# no race of interviewer
lm_partyideo_nri <- lm(
  r_party ~ 
    ideo_awareness_scaled.center*r_ideology.center +
    religiosity.index +
    morality.index +
    r_age +
    r_income +
    r_gender +
    r_education +
    factor(year),
  data = df_all,
  subset = df_all$r_race == "Black",
  weights = weight)

summary(lm_partyideo_nri)

se_main_list <- list(coeftest(lm_partyideo_nri, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2],
                     coeftest(lm_partyideo, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2],
                     coeftest(lm_partyideo_rc, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2])


# covariate labels
lm_main_labs <- c("Liberal-conservative familiarity",
                  "Ideology (Conservative)",
                  "Black interviewer",
                  "Racial consciousness",
                  "Religiosity",
                  "Traditional morality",
                  "Liberal-conservative familiarity X Ideology (Conservative)",
                  "Black interviewer X Ideology (Conservative)",
                  "Racial consciousness X Ideology (Conservative)")
# 
# stargazer(lm_partyideo_nri, lm_partyideo, lm_partyideo_rc,
#           se = se_main_list,
#           covariate.labels = lm_main_labs,
#           dep.var.labels = "Partisan identification (Republican)",
#           omit.stat = c("rsq", "f", "ser"),
#           star.cutoffs = my_stars,
#           label = "tab:lm_main_regs",
#           title = "Main regressions",
#           omit = c("r_age",
#                    "r_income",
#                    "r_education",
#                    "r_gender",
#                    "year"),
#           out = "jen_output/tables/lm_main_regs.tex")
# 
# 
stargazer(lm_partyideo_nri, lm_partyideo, lm_partyideo_rc,
          se = se_main_list,
          type = "html",
          dep.var.caption = "",
          covariate.labels = lm_main_labs,
          dep.var.labels = "Partisan identification (Republican)",
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          label = "tab:lm_main_regs",
          title = "Partisan identification and liberal-conservative familiarity",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/lm_main_regs.html")







# Interaction effect of ideology on partisan identities -------------------

start <- Sys.time()
ideo_libcon_me_nri <- cplot(lm_partyideo_nri,
                            x = "ideo_awareness_scaled.center", 
                            dx = "r_ideology.center", what = "effect",
                            vcov = vcovCL(lm_partyideo_nri, cluster = ~year, type = "HC1"),
                            draw = FALSE)
print(Sys.time()-start)

start <- Sys.time()
ideo_libcon_me <- cplot(lm_partyideo,
                        x = "ideo_awareness_scaled.center", 
                        dx = "r_ideology.center", what = "effect", 
                        vcov = vcovCL(lm_partyideo, cluster = ~year, type = "HC1"),
                        draw = FALSE)
print(Sys.time()-start)

start <- Sys.time()
ideo_libcon_me_rc <- cplot(lm_partyideo_rc,
                           x = "ideo_awareness_scaled.center", 
                           dx = "r_ideology.center", what = "effect", 
                           vcov = vcovCL(lm_partyideo_rc, cluster = ~year, type = "HC1"),
                           draw = FALSE)
print(Sys.time()-start)

ideo_libcon_me_df <- mutate(ideo_libcon_me_nri, model = "Model 1") %>% 
  rbind(mutate(ideo_libcon_me, model = "Model 2")) %>% 
  rbind( mutate(ideo_libcon_me_rc, model = "Model 3"))

g_ideo_libcon_me_all <- ggplot(ideo_libcon_me_df, aes(x=xvals)) +
  geom_line(aes(y = yvals, color = model)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, fill=model), 
              alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = 2) + 
  labs(x = "Liberal-Conservative Familiarty\nscore (scaled and centered)", 
       y = "Average Marginal Effect of Ideology\n(Conservative) on Party (Republican)",
       color = "Model", fill = "Model") +
  scale_fill_viridis_d() +
  scale_color_viridis_d()
g_ideo_libcon_me_all

g_ideo_libcon_me1 <-ggplot(ideo_libcon_me_nri, aes(x=xvals)) +
  geom_line(aes(y = yvals)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), 
              alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = 2)+ 
  labs(x = "Liberal-Conservative Familiarty\nscore (scaled and centered)", 
       y = "Average Marginal Effect of Ideology\n(Conservative) on Party (Republican)")
g_ideo_libcon_me1

ggsave("jen_output/figures/ideo_libcon_me1.jpeg", g_ideo_libcon_me1,
       height = 4, width = 5)
ggsave("jen_output/figures/ideo_libcon_me_all.pdf", g_ideo_libcon_me_all,
       height = 6, width = 8)



# Policy (agg) regressed on party and ideology (interacted) --------

lm_econp <- lm(econ_policy.index~ 
                 r_ideology.center*r_party.center +
                 r_age + r_gender + r_education + r_income +
                 factor(year), df_all,
               subset = df_all$r_race == "Black",
               weights = df_all$weight)
lm_socp <- lm(social_policy.index~
                r_ideology.center*r_party.center +
                r_age + r_gender + r_education + r_income +
                factor(year), df_all,
              subset = df_all$r_race == "Black",
              weights = df_all$weight)


lm_econp_var <- lm(econ_policy.index~ 
                     r_ideology.center*r_party.center +
                     morality.index.center + religiosity.index.center +
                     r_age + r_gender + r_education + r_income +
                     factor(year), df_all,
                   subset = df_all$r_race == "Black",
                   weights = df_all$weight)
lm_socp_var <- lm(social_policy.index~
                    r_ideology.center*r_party.center +
                    morality.index.center + religiosity.index.center +
                    r_age + r_gender + r_education + r_income +
                    factor(year), df_all,
                  subset = df_all$r_race == "Black",
                  weights = df_all$weight)

summary(lm_econp)
summary(lm_socp)

summary(lm_econp_var)
summary(lm_socp_var)


se_policy_list <- list(coeftest(lm_econp, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_econp_var, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp_var, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2])


policy_covlabs <- c("Ideology (Conservative)", 
                    "Party (Republican)",
                    "Morality Index",
                    "Religiosity Index",
                    "Ideology x Party")


stargazer(lm_econp, lm_socp, lm_econp_var,lm_socp_var, 
          se = se_policy_list,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences, ideology, and party among Black Americans",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs.html")

start <- Sys.time()
ideo_party_pol_econp <- cplot(lm_econp,
                              dx = "r_ideology.center", 
                              x = "r_party.center", what = "effect", 
                              vcov = vcovCL(lm_econp, cluster = ~year, type = "HC1"),
                              draw = FALSE)
print(Sys.time()-start)

start <- Sys.time()
ideo_party_pol_socp <- cplot(lm_socp,
                             dx = "r_ideology.center", 
                             x = "r_party.center", what = "effect", 
                             vcov = vcovCL(lm_socp, cluster = ~year, type = "HC1"),
                             draw = FALSE)
print(Sys.time()-start)


start <- Sys.time()
ideo_party_pol_socp_var <- cplot(lm_socp_var,
                                 dx = "r_ideology.center", 
                                 x = "r_party.center", what = "effect", 
                                 vcov = vcovCL(lm_socp_var, cluster = ~year, type = "HC1"),
                                 draw = FALSE)
print(Sys.time()-start)

start <- Sys.time()
ideo_party_pol_econp_var <- cplot(lm_econp_var,
                                  dx = "r_ideology.center", 
                                  x = "r_party.center", what = "effect", 
                                  vcov = vcovCL(lm_econp_var, cluster = ~year, type = "HC1"),
                                  draw = FALSE)
print(Sys.time()-start)


ideo_party_pol_df <- mutate(ideo_party_pol_econp, model = "Model 1") %>% 
  rbind(mutate(ideo_party_pol_socp, model = "Model 2")) %>% 
  rbind(mutate(ideo_party_pol_econp_var, model = "Model 3")) %>% 
  rbind(mutate(ideo_party_pol_socp_var, model = "Model 4"))

g_ideo_party_pol <- ggplot(ideo_party_pol_df, aes(x=xvals)) +
  geom_line(aes(y = yvals, color = model)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, fill=model), 
              alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = 2) + 
  labs(x = "Party (Republican)", 
       y = "Average Marginal Effect of Ideology (Conservative) on Policy Preferences",
       color = "Model", fill = "Model") +
  scale_fill_viridis_d() +
  facet_wrap(~model) +
  scale_color_viridis_d() +
  theme(legend.position = "none")

g_ideo_party_pol

ggsave("jen_output/figures/ideo_party_pol.pdf", g_ideo_party_pol,
       height = 6, width = 8)


# DO IT FOR WHITE PEOPLE
lm_econp_w <- lm(econ_policy.index~ 
                   r_ideology.center*r_party.center +
                   r_age + r_gender + r_education + r_income +
                   factor(year), df_all,
                 subset = df_all$r_race == "White",
                 weights = df_all$weight)
lm_socp_w <- lm(social_policy.index~
                  r_ideology.center*r_party.center +
                  r_age + r_gender + r_education + r_income +
                  factor(year), df_all,
                subset = df_all$r_race == "White",
                weights = df_all$weight)


lm_econp_var_w <- lm(econ_policy.index~ 
                       r_ideology.center*r_party.center +
                       morality.index.center + religiosity.index.center +
                       r_age + r_gender + r_education + r_income +
                       factor(year), df_all,
                     subset = df_all$r_race == "White",
                     weights = df_all$weight)
lm_soc_var_w <- lm(social_policy.index~
                     r_ideology.center*r_party.center +
                     morality.index.center + religiosity.index.center +
                     r_age + r_gender + r_education + r_income +
                     factor(year), df_all,
                   subset = df_all$r_race == "White",
                   weights = df_all$weight)

summary(lm_econp_w)
summary(lm_socp_w)

summary(lm_econp_var_w)
summary(lm_soc_var_w)


se_policy_list_w <- list(coeftest(lm_econp_w, vcov = vcovCL, 
                                  cluster = ~year, type = "HC1")[,2],
                         coeftest(lm_socp_w, vcov = vcovCL, 
                                  cluster = ~year, type = "HC1")[,2],
                         coeftest(lm_econp_var_w, vcov = vcovCL, 
                                  cluster = ~year, type = "HC1")[,2],
                         coeftest(lm_soc_var_w, vcov = vcovCL, 
                                  cluster = ~year, type = "HC1")[,2])

stargazer(lm_econp_w, lm_socp_w, lm_econp_var_w, lm_soc_var_w, 
          se = se_policy_list_w,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences, ideology, and party among White Americans",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_w.html")




# Individual policy, ideology, and party regressions -----------------------


## DO THE SAME WITH INDIVIDUAL POLICIES

# get list of lms and ses for various policie vars
policy_var <- colnames(select(df_all,starts_with("policy_") &
                                !ends_with("center")))
ind_var <- "~r_ideology.center*r_party.center +
                  morality.index.center + religiosity.index.center +
                  r_age + r_gender + r_education + r_income +
                  factor(year)" 
policy_lms <- lapply(policy_var, function(x) lm(formula(paste0(x, ind_var)), df_all,
                                                subset = df_all$r_race == "Black",
                                                weights = df_all$weight))
policy_ses <- lapply(policy_lms, function(x) coeftest(x, vcov = vcovCL, 
                                                      cluster = ~year, type = "HC1")[,2])
policy_labs <- c("Healthcare", "Job guar.", "Gov. spending",
                 "Abortion", "Immigration", "Affirmative action")


stargazer(policy_lms,
          se = policy_ses,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs,
          dep.var.labels.include=FALSE,
          column.labels = policy_labs,
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences (seperated out), ideology, and party among Black Americans",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_ind.html")

lapply(policy_var, function(x) table(df_all$year,pull(df_all,x)))


start <- Sys.time()
ideo_party_indpolicy_df <- lapply(policy_lms, function(x) cplot(x,
                                                                dx = "r_ideology.center", 
                                                                x = "r_party.center", what = "effect", 
                                                                vcov = vcovCL(x, cluster = ~year, type = "HC1"),
                                                                draw = FALSE))
print(Sys.time()-start)

ideo_party_indpolicy_df_bound <- bind_rows(ideo_party_indpolicy_df,
                                           .id = "model") %>% 
  mutate(model = paste("Model", model))

g_ideo_party_indpol <- ggplot(ideo_party_indpolicy_df_bound, aes(x=xvals)) +
  geom_line(aes(y = yvals, color = model)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, fill=model), 
              alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = 2) + 
  labs(x = "Party (Republican)", 
       y = "Average Marginal Effect of Ideology (Conservative) on Policy Preferences") +
  scale_fill_viridis_d() +
  facet_wrap(~model) +
  scale_color_viridis_d() +
  theme(legend.position = "none")
g_ideo_party_indpol

ggsave("jen_output/figures/ideo_party_indpol.pdf", g_ideo_party_indpol,
       height = 6, width = 8)

# Policy regressed on familiarity and ideology (interacted) ---------------
lm_econp_fs <- lm(econ_policy.index~ 
                    r_ideology.center*ideo_awareness_scaled.center +
                    r_age + r_gender + r_education + r_income +
                    factor(year), df_all,
                  subset = df_all$r_race == "Black",
                  weights = df_all$weight)
lm_socp_fs <- lm(social_policy.index~
                   r_ideology.center*ideo_awareness_scaled.center +
                   r_age + r_gender + r_education + r_income +
                   factor(year), df_all,
                 subset = df_all$r_race == "Black",
                 weights = df_all$weight)


lm_econp_var_fs <- lm(econ_policy.index~ 
                        r_ideology.center*ideo_awareness_scaled.center +
                        morality.index.center + religiosity.index.center +
                        r_age + r_gender + r_education + r_income +
                        factor(year), df_all,
                      subset = df_all$r_race == "Black",
                      weights = df_all$weight)
lm_soc_var_fs <- lm(social_policy.index~
                      r_ideology.center*ideo_awareness_scaled.center +
                      morality.index.center + religiosity.index.center +
                      r_age + r_gender + r_education + r_income +
                      factor(year), df_all,
                    subset = df_all$r_race == "Black",
                    weights = df_all$weight)

summary(lm_econp_fs)
summary(lm_socp_fs)

summary(lm_econp_var_fs)
summary(lm_soc_var_fs)


se_policy_list <- list(coeftest(lm_econp_fs, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp_fs, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_econp_var_fs, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_soc_var_fs, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2])


policy_covlabs_fs <- c("Ideology (Conservative)", 
                       "Liberal-Conservative Familiarity Score",
                       "Morality Index",
                       "Religiosity Index",
                       "Ideology x Familiarity Score")


stargazer(lm_econp_fs, lm_socp_fs, lm_econp_var_fs,lm_soc_var_fs, 
          se = se_policy_list,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs_fs,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences, ideology, and liberal-conservative familiarity score among Black Americans",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_fs.html")


# Share of black respondents obama = cons but i = lib ---------------------
df_all %>% 
  # filter to relevant years
  filter(year == 2012 | year == 2008,
         !is.na(r_ideology_name), r_race == "Black") %>% 
  # allow conservative for obama
  group_by(dpc_correct, r_ideology_name) %>% 
  tally() %>% 
  ungroup() %>% 
  mutate(prop = n /sum(n)) %>% 
  mutate(dpc_correct = ifelse(dpc_correct == 1, "Obama is liberal or moderate",
                              "Obama is conservative")) %>% 
  rename(` ` = dpc_correct,
         `Respondent ideology` = r_ideology_name) %>% 
  xtable(digits = c(rep(0, 4), 3),
         caption = "Proportion of Black respondents say Obama is conservative and they are liberal (2008, 2012)") %>% 
  print(include.rownames= FALSE, type = "html", caption.placement="top") %>% 
  cat(file = "jen_output/tables/obama_resp_ideo_tab.html")


# Correlation matrix of ideology and every possible variable --------------

g_ideocor <- df_all %>% 
  filter(r_race == "Black") %>% 
  mutate(male = ifelse(r_gender == "Man", 1, 0), 
         across(everything(), ~as.numeric(.x))) %>% 
  select(r_ideology, familiarty_score = ideo_awareness_scaled, r_party,
         starts_with("ft") & !ends_with("center"),
         ends_with("index"), starts_with("policy") & !ends_with("center"),
         r_income, r_education, r_age, male)  %>% 
  cor(use = "pairwise.complete.obs") %>% 
  data.frame() %>% 
  select(r_ideology) %>% 
  rownames_to_column("Variable") %>% 
  mutate(Variable = factor(Variable, levels = rev(Variable))) %>% 
  ggplot() +
  geom_label(aes(x= Variable, y = r_ideology, 
                 label = round(r_ideology,3) #, fill = r_ideology
  )) +
  labs(y = "Correlation coefficients (r) with ideology") +
  ylim(-1,1) +
  coord_flip()
g_ideocor

ggsave(g_ideocor, filename = "jen_output/figures/ideocor.pdf",
       height = 7, width = 6)

# Mechanisms for differential familiarity scores ---------------------------

# look at our media var
df_all %>% 
  filter(r_race == "White" |r_race == "Black",
         year %in% seq(2008, 2016, 4)) %>% 
  group_by(year, r_race, media.index) %>% 
  tally() %>% 
  mutate(total = sum(n),
         prop = n/total) %>% 
  ungroup() %>% 
  ggplot(aes(media.index, prop, color = r_race)) +
  geom_smooth(se = FALSE)+
  geom_point(size = .6) +
  ylim (0, 0.2) +
  facet_grid(~year)


df_all %>% 
  filter(r_race == "White" |r_race == "Black",
         year %in% seq(2008, 2016, 4)) %>% 
  group_by(year, r_race, media.index2) %>% 
  tally() %>% 
  mutate(total = sum(n),
         prop = n/total) %>% 
  ungroup() %>% 
  ggplot(aes(media.index2, prop, color = r_race)) +
  geom_smooth(se = FALSE)+
  geom_point(size = .6) +
  facet_grid(~year)

df_all %>% 
  filter(r_race == "White" |r_race == "Black",
         year %in% seq(2004, 2016, 4)) %>% 
  group_by(year,r_race, days.media.index) %>% 
  tally() %>% 
  mutate(prop = n/sum(n)) %>% 
  ungroup() %>% 
  ggplot(aes(days.media.index, prop, color = r_race)) +
  geom_smooth(se = FALSE)+
  geom_point(size = .6) +
  facet_grid(~year)


## regressions for familiarity score

lm_predict_famscore_race <- lm(ideo_awareness_scaled ~ 
                                 r_race +
                                 factor(year), 
                               data = df_all,
                               subset =  (df_all$r_race == "Black" | df_all$r_race ==  "White") &
                                 (df_all$year %in% seq(2008, 2016, 4)),
                               weights = df_all$weight)

lm_predict_famscore_nomedia <- lm(ideo_awareness_scaled ~ 
                                    r_race +
                                    r_age +
                                    r_education +
                                    r_income +
                                    ofcrec.index2 +
                                    politics_attention +
                                    factor(year), 
                                  data = df_all,
                                  subset =  (df_all$r_race == "Black" | df_all$r_race ==  "White") &
                                    (df_all$year %in% seq(2008, 2016, 4)),
                                  weights = df_all$weight)

lm_predict_famscore_all <- lm(ideo_awareness_scaled ~ 
                                r_race +
                                media.index2 +
                                r_age+
                                r_education +
                                r_income +
                                ofcrec.index2 +
                                politics_attention +
                                factor(year), 
                              data = df_all,
                              subset = (df_all$r_race == "Black" | df_all$r_race ==  "White"),
                              weights = df_all$weight)

summary(lm_predict_famscore_race)
summary(lm_predict_famscore_nomedia)
summary(lm_predict_famscore_all)

lm(ideo_awareness_scaled ~ 
     r_race +
     media.index +
     r_age+
     r_education +
     r_income +
     factor(year), 
   data = df_all,
   subset = (df_all$r_race == "Black" | df_all$r_race ==  "White"),
   weights = df_all$weight) %>% 
  summary()


predict_famscore_covlabs <- c("Race (Black)", 
                              "News Media Index",
                              "Age",
                              "Education",
                              "Income",
                              "Office Recognition (3 ques.)",
                              "Attention to Politics")

se_predict_famscore <- list(coeftest(lm_predict_famscore_race, vcov = vcovCL, 
                                     cluster = ~year, type = "HC1")[,2],
                            coeftest(lm_predict_famscore_all, vcov = vcovCL, 
                                     cluster = ~year, type = "HC1")[,2])


stargazer(lm_predict_famscore_race, 
          lm_predict_famscore_all,
          se = se_predict_famscore,
          type = "html",
          dep.var.caption = "",
          covariate.labels = predict_famscore_covlabs,
          dep.var.labels ="Liberal-Conservative Familiarity Score",
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Race gap for liberal-conservative familiarity score",
          omit = c("year"),
          out = "jen_output/tables/predict_famscore.html")






# What predicts unsorted/sorted respondents among race? ------------------
# input a sorted var
df_sorted <- df_all %>% 
  filter(r_party != .5, !is.na(r_party)) %>% 
  mutate(sorted = ifelse(
    # liberal republicans
    r_party > .5 & r_ideology < .5 |
      # conservative dems
      r_party < .5 & r_ideology > .5, 0, 1))

df_sorted %>% 
  filter(r_race == "Black") %>% 
  group_by(year, sorted) %>% 
  tally() %>% 
  View()

lm_predict_sorted <- lm(sorted ~ 
                          ideo_awareness_scaled +
                          r_age +
                          r_education +
                          r_income +
                          media.index2 +
                          ofcrec.index2 +
                          politics_attention +
                          factor(year), 
                        data = df_sorted,
                        subset = (df_sorted$r_race == "Black"),
                        weights = df_sorted$weight)

lm_predict_sorted_w <- lm(sorted ~ 
                            ideo_awareness_scaled +
                          r_age +
                          r_education +
                          r_income +
                          media.index2 +
                          ofcrec.index2 +
                          politics_attention +
                          factor(year), 
                        data = df_sorted,
                        subset = (df_sorted$r_race == "White"),
                        weights = df_sorted$weight)




predict_sorted_covlabs <- c("Familiarity Score",
                            "Age",
                            "Education",
                            "Income",
                            "News Media Index",
                            "Office Recognition (3 ques.)",
                            "Attention to Politics")


se_predict_sorted <- list(coeftest(lm_predict_sorted, vcov = vcovCL, 
                                    cluster = ~year, type = "HC1")[,2],
                           coeftest(lm_predict_sorted_w, vcov = vcovCL, 
                                    cluster = ~year, type = "HC1")[,2])


# write it out
stargazer(lm_predict_sorted, 
          lm_predict_sorted_w,
          se = se_predict_sorted,
          type = "html",
          dep.var.caption = "",
          covariate.labels = predict_sorted_covlabs,
          column.labels =c("Black Americans", "White Americans"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Predicting who is sorted (liberal/moderate Democrats and conservative/moderate Republicans)",
          omit = c("year"),
          out = "jen_output/tables/predict_sorted.html")



